/* Notes

The twin modeling is based on version 3-0-0 of the TwinLife dataset. Access can be requested through GESIS here: https://search.gesis.org/research_data/ZA6701

The OpenMx code for Tables 1 and 3 uses the file twinlife.csv

*/

cd ""

clear
use ZA6701_person_v3-0-0

keep fid pid ptyp pop* sop0106 zyg0102 eca0106 sex age0100 age0101 age0200 hoe* iva* wid

keep if ptyp<3 | ptyp==300 | ptyp==400 /* Keep twins and parents only */

drop if zyg0102==-92 | zyg0102==-95 /* Drop if zygosity is missing */

keep if wid==1 /* keep only wave 1 data as interest isn't measured at wave 2 */

* Remove missing values
foreach var of varlist * {
	replace `var' = . if `var'<0
}

*** Clean variables
* pop0100 - interest in politics - four categories
tab pop0100
gen interest=pop0100

* sex
recode sex 2=0

* age categories

gen agecat = 0 if age0200<=6
replace agecat = 1 if age0200<=12 & agecat==.
replace agecat = 2 if age0200<=18 & agecat==.
replace agecat = 3 if age0200<=25 & agecat==.
replace agecat = . if ptyp>3

*** Combine twins onto rows

sum interest if sex==0 & agecat!=.
sum interest if sex==1 & agecat!=.
ttest interest if agecat!=., by(sex)
ttest interest if agecat==1, by(sex)
ttest interest if agecat==2, by(sex)
ttest interest if agecat==3, by(sex)

drop sop* pop* iva* hoe*

drop pid

drop age0100 age0101

sort fid ptyp

foreach var of varlist interest {
	by fid: egen temp=mean(`var') if ptyp==300
	by fid: egen m_`var'=total(temp), missing
	drop temp
	
	by fid: egen temp=mean(`var') if ptyp==400
	by fid: egen f_`var'=total(temp), missing
	drop temp
}

drop if ptyp>3

* Zygosity and parental interest

rename zyg0102 zyg

gen r=1 if zyg==1
replace r=0.5 if zyg==2

gen mz = zyg

gen mzsex = 1 if mz==1 & sex==1 /* mzm */
replace mzsex = 2 if mz==2 & sex==1 /* dzm */
replace mzsex = 3 if mz==1 & sex==0 /* mzf */
replace mzsex = 4 if mz==2 & sex==0 /* dzf */

gen mzage = 1 if mz==1 & agecat==1 /* mz1 */
replace mzage = 2 if mz==2 & agecat==1 /* dz1 */
replace mzage = 3 if mz==1 & agecat==2 /* mz2 */
replace mzage = 4 if mz==2 & agecat==2 /* dz2 */
replace mzage = 5 if mz==1 & agecat==3 /* mz3 */
replace mzage = 6 if mz==2 & agecat==3 /* dz3 */

gen p_interest = m_interest
replace p_interest = (p_interest + f_interest)/2 if p_interest!=. & f_interest!=.
replace p_interest = f_interest if p_interest==.

qui sum p_interest
gen p_interest2 = 0 if p_interest!=. & p_interest<r(mean)
qui sum p_interest
replace p_interest2 = 1 if p_interest!=. & p_interest>r(mean)

qui sum p_interest
local low = r(mean)-r(sd)
local high = r(mean)+r(sd)
gen p_interest3 = 0 if p_interest!=. & p_interest<`low'
replace p_interest3 = 2 if p_interest!=. & p_interest>`high'
replace p_interest3 = 1 if p_interest!=. & p_interest>`low' & p_interest<`high'

outsheet using twinlife_long.csv, comma nolabel replace

* Reshape data

foreach var of varlist interest {
	rename `var' `var'_
}

reshape wide *_, i(fid) j(ptyp)

* Randomize Twin 1 and Twin 2

set seed 08092018
capture drop rand
gen rand = rnormal()
replace rand = 0 if rand<=0
replace rand = 1 if rand>0

foreach var of varlist *1 {

local stem = subinstr("`var'","1","",1)
display "`stem'"

gen `var'_temp1 = `var' if rand==1
gen `var'_temp2 = `stem'2 if rand==1
replace `var' = `var'_temp2 if rand==1
replace `stem'2 = `var'_temp1 if rand==1
drop *temp1 *temp2

}

outsheet using twinlife.csv, comma nolabel replace
